Newer
Older
Digital_Repository / Repositories / Maps / Otago Eprints / Code_Perl_Scripts / Google / Network_Links_All_Zipped / Points.pl
#!/usr/bin/env perl
# Perl Script to generate bars representing the volume of access of the
# Otago_eprints archive by geographical location
# Author Nigel Stanger
# Altered by Hayden Kane November 2007
use strict;
use DBI;
use Geo::IP;
use POSIX qw(floor);
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );

# Database connection variables.
my ($dsn)       = "DBI:mysql:database=eprintstats;host=127.0.0.1";
my ($user_name) = "eprintstatspub";
my ($password)  = "public";
my ( $connect, $query, %types, %unmapped, $stat, $row, $num_rows, $vtype );

# Geolocation database.
my ($gi);
my ($gidb) = '/usr/local/share/GeoIP/GeoLiteCity.dat';

# Longitude adjustment lookup table. The size of a degree of longitude
# shrinks as we approach the pole, so we need to scale the size of the
# bars that we draw to compensate. The values are derived from simple
# trigonometry. One degree latitude intervals is probably overkill, but
# it was easy to generate and is probably also easier to process (we just
# take the absolute value and round to the nearest integer).
my (@long_adjust) = (
					  1,           1.000149936, 1.000625031, 1.001401156,
					  1.002504822, 1.003938193, 1.005527222, 1.007577665,
					  1.009814805, 1.012548385, 1.015451362, 1.018863602,
					  1.022455549, 1.026414998, 1.030723667, 1.035416774,
					  1.040342076, 1.045806114, 1.051493576, 1.057774136,
					  1.064328263, 1.071308119, 1.07855449,  1.086459634,
					  1.094691064, 1.103413101, 1.112646298, 1.122444401,
					  1.132640933, 1.143416279, 1.154798315, 1.166647234,
					  1.179193306, 1.192438327, 1.206420497, 1.220995149,
					  1.236155295, 1.252205744, 1.269161249, 1.286871805,
					  1.30559452,  1.325133265, 1.34585379,  1.367512898,
					  1.390218486, 1.414290258, 1.439883436, 1.466366234,
					  1.494622451, 1.52427924,  1.555922715, 1.589286423,
					  1.624573794, 1.661877673, 1.701368815, 1.743769606,
					  1.788337802, 1.836254359, 1.887432209, 1.94210986,
					  2.000049973, 2.062828574, 2.130469499, 2.203424356,
					  2.281552845, 2.366264633, 2.459472746, 2.560324974,
					  2.669801881, 2.7913935,   2.924802689, 3.072783109,
					  3.236797412, 3.421061629, 3.629874841, 3.865462623,
					  4.133753357, 4.448482828, 4.811613369, 5.242729893,
					  5.759533746, 6.395493768, 7.190621631, 8.209846154,
					  9.579463858, 11.47777459, 14.35030477, 19.13145315,
					  28.71090387, 57.42180775, 57.42180775,
);

# Miscellaneous variables.
my ( %cities, %IPs );
my ($num_entries) = -1;
my ($num_hits)    = 0;
my ( $ip, $count, $location, $country, $year, $month );
my ( $lat, $long, $city, $key ) = ( 0, 0, '', '' );
my ( $maxcolour, $red, $blue );
$unmapped{'download'} = $unmapped{'abstract'} = 0;

# A degree of latitude is always of constant length, so define it globally.
# The length of a degree of longitude changes depending on the latitude.
my ($latsize) = 0.075;
$num_entries = -1;

# Connect to GEO::IP Database
$gi = Geo::IP->open( $gidb, GEOIP_STANDARD )
  or die "Unable to open GeoIP database $gidb\n";

# Connect to MySQL database using connection variables set above
$connect = DBI->connect( $dsn, $user_name, $password, { RaiseError => 1 } );

# Query to get data from MySQL database about the usage of the otago_eprints archive
$query = "SELECT ip, view_type, country_name, COUNT(*) AS count
	 FROM view
	 WHERE archive_name = 'otago_eprints' AND country_code NOT LIKE 'X@%'
	 GROUP BY ip, view_type, country_name
	 ORDER BY count DESC" . ( ( $num_entries > 0 ) ? " LIMIT $num_entries" : '' );
$stat = $connect->prepare($query);
$stat->execute();
$num_rows = $stat->rows;

# If query returned any results they need to be processed
if ( $num_rows > 0 ) {
	$num_entries = $num_rows if ( $num_entries < 1 );

	# Loop to process each of the query results
	while ( $row = $stat->fetchrow_hashref() ) {

		# extracting values from query result
		$ip      = $row->{'ip'};
		$count   = $row->{'count'};
		$country = $row->{'country_name'};

   # change the country value to New Zealand if either of the below alternatives
   # appear as the value for country
		$country = 'New Zealand'
		  if (    ( $country eq 'Otago Intranet' )
			   || ( $country eq 'Repository Admin' ) );
		$vtype = $row->{'view_type'};
		$IPs{$ip} = 1;

		# Get the location of the archive use by ip address
		$location = $gi->record_by_addr($ip);

		# Extract values from the location result
		if ( defined($location) ) {
			$lat  = $location->latitude;
			$long = $location->longitude;

			# Create key value to put into Map data-structure
			if ( $location->city eq '' ) {
				$city = $country . ' (unidentified)';
				$key = sprintf( "!%s (%f, %f)", $country, $lat, $long );
			} else {
				$city = $location->city;
				$key = sprintf( "%s %s", $city, $country );
			}

			# If there are multiple points for the same city in
			# the same country, we accumulate the lats and longs for these
			# points and keep a track of how many points there are in total,
			# so that we can work out a weighted average latlong for the
			# city. Note that this assumes that each city name only exists
			# once within a country, i.e., it will break if there are
			# multiple cities with the same name in the same country!
			if ( !defined( $cities{$key} ) ) {
				$cities{$key}{'name'}       = $city;
				$cities{$key}{'lat'}        = 0;
				$cities{$key}{'long'}       = 0;
				$cities{$key}{'abstract'}   = 0;
				$cities{$key}{'download'}   = 0;
				$cities{$key}{'num_points'} = 0;
			}
			$cities{$key}{'lat'}  += $lat;
			$cities{$key}{'long'} += $long;
			$cities{$key}{$vtype} += $count;
			$cities{$key}{'num_points'}++;
		} else {

			# To keep count of the unmapped accesses of the otago_eprints
			# Due to the limitations in the GEO Database that is being used
			# GeoLiteCity is said to be over 98% accurate on a country level
			# and 70% accurate on a city level
			# Not used in this program but is here for future development if
			# necessary
			$unmapped{$vtype} += $count;
		}
	}

	# Average the location of multiple points for the same city, weighted
	# by the number of hits for each point.
	foreach $city ( keys %cities ) {
		$cities{$city}{'lat'} =
		  $cities{$city}{'lat'} / $cities{$city}{'num_points'};
		$cities{$city}{'long'} =
		  $cities{$city}{'long'} / $cities{$city}{'num_points'};
	}
	open( KML, "> Points.kml" );

	# Need to wait until we have all the counts before writing the data out.
	print KML '<?xml version="1.0"?>
<kml xmlns="http://earth.google.com/kml/2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://earth.google.com/kml/2.1 KMLSchema.xsd">
	<Folder>
    	<name>Points</name>
    	<description>Select this folder to display points from which the repository has been accessed</description>
		<Folder>
			<name>Locations (identified)</name>
			<description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only identified locations are displayed.</description>';

	# Plot known location icons as a separate folder so that we can turn them
	# on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude unknown locations.
		if ( $city !~ /^!/ ) {
			make_icon(
					   $cities{$city}{'name'},     $cities{$city}{'download'},
					   $cities{$city}{'abstract'}, $cities{$city}{'lat'},
					   $cities{$city}{'long'}
			);
		}
	}
	print KML '
		</Folder>
		<Folder>
			<name>Locations (unidentified)</name>
			<description>Locations from which the repository has been accessed. The icon is colour-coded according to the relative proportion of abstract views (blue) versus document downloads (red). Only unidentified locations are displayed.</description>';

	# Plot known location icons as a separate folder so that we can turn them
	# on and off as a group.
	foreach $city ( sort( keys(%cities) ) ) {

		# Exclude known locations.
		if ( $city =~ /^!/ ) {
			make_icon(
					   $cities{$city}{'name'},     $cities{$city}{'download'},
					   $cities{$city}{'abstract'}, $cities{$city}{'lat'},
					   $cities{$city}{'long'}
			);
		}
	}
	print KML '
		</Folder>
	</Folder>
</kml>
';
}

# Close off connection to the database and the KML file
$stat->finish();
$connect->disconnect();
close(KML);

# Zip KML file to KMZ
my $file = 'Points.kml';
my $zip  = Archive::Zip->new();
$zip->addFile($file);
my $status = $zip->writeToFileNamed('Points.kmz');
die "error somewhere" if $status != AZ_OK;

# Open the Sim.kmz temporary file and give it the Handle INPUT
open( INPUT, "< Points.kmz" )
  or die "Couldn't open /usr/local/widgets/data for reading: $!\n";

# Print the header so that CGI will recognise it
print "Content-Type: application/vnd.google-earth.kmz\n\n";    

# Print the KMZ file
while (<INPUT>) {
	print;
}
close(INPUT);

# System calls to remove the temporary files
system("rm Points.kml");
system("rm Points.kmz");

# Close off connection to the database
$stat->finish();
$connect->disconnect();

# Routine to round to the nearest whole number
sub round {
	my ($n) = shift;
	return int( $n + 0.5 * ( $n <=> 0 ) );
}

# Routine to print out all the necessary information to create and generate an icon for each placemark
sub make_icon {
	my ( $name, $download, $abstract, $lat, $long ) = @_;
	my ($maxcolour) = $download + $abstract;
	my ($red)       = round( $download / $maxcolour * 255 );
	my ($blue)      = round( $abstract / $maxcolour * 255 );
	print KML '
			<Placemark>
				<name>'
	  . $name . '</name>
				<visibility>0</visibility>
				<description>
					<![CDATA[<span style="color:red;">'
	  . $download
	  . ' download'
	  . ( ( $download != 1 ) ? 's' : '' )
	  . '</span>, <span style="color:blue;">'
	  . $abstract
	  . ' abstract'
	  . ( ( $abstract != 1 ) ? 's' : '' )
	  . '</span>]]>
				</description>
				<LookAt>
					<longitude>'
	  . $long . '</longitude>
					<latitude>'
	  . $lat . '</latitude>
					<range>60000</range>
					<tilt>45</tilt>
					<heading>3</heading>
				</LookAt>
				<Style>
					<IconStyle>
						<color>'
	  . sprintf( 'ff%02x00%02x', $blue, $red ) . '</color>
						<Icon>
							<href>root://icons/palette-4.png</href>
						</Icon>
						<hotSpot x="0.5" y="0.5" xunits="fraction" yunits="fraction" />
					</IconStyle>
				</Style>
				<Point>
					<coordinates>'
	  . $long . ',' . $lat . ',0</coordinates>
				</Point>
			</Placemark>';
}